

*-------------------------------------------------------------------------------
* collect and collapse annual files
*-------------------------------------------------------------------------------


* set loop over dates
*--------------------

forvalues year = 2012/2021 {
di "YEAR: " "`year'" 


	* In 2019–2021, CPD data was saved separately
	*--------------------------------------------	
	if `year' >= 2019 & `year' <=2021 {		
	cap import delimited "data/inputs/tsss/`year'_ITSS_CPD_redacted.txt", delimiter("~") clear 
	cap import delimited "data/inputs/tsss/`year' Chicago PD--REDACTED Traffic Stop Data.csv", varnames(1) clear
	cap import delimited "data/inputs/tsss/CPD - `year' REDCATED Traffic Data.csv", delimiter(comma) varnames(1) clear 
	gen agencyname = "CHICAGO POLICE"
	
	rename drrace driverrace
	rename drsex driversex
	rename reasstop reasonforstop
	rename vehsrchcond vehiclesearchconducted
	rename drvsrchcond driversearchconducted
	rename passsrchcond passengersearchconducted
	rename dogalertsrch policedogvehiclesearched
	rename resstop resultofstop
	rename typemov typeofmoving
	rename vehcontra vehiclecontrabandfound
	rename vehother vehicleothercontrabandfound
	rename passdrvcontra driverpassengercontrabandfound
	rename passdrvother driverpassengerothercontrabandfo
	rename dogalertsrchcontra policedogcontrabandfound 
	rename dogother policedogothercontrabandfound
	
	rename beat_i beatlocationofstop
	cap tostring beatlocationofstop, replace force
	
	tempfile cpd`year'
	save `cpd`year''
	}
	else {
	}


* insheet raw data from IDOT
*---------------------------
import delimited "data/inputs/tsss/`year' ITSS Data.txt", delimiter("~") clear 


	* add CPD data for 2019–2021
	*---------------------------
	if `year' >= 2019 & `year' <=2021 {	 
		append using `cpd`year''
	}
	else {
	}


* check that data were read in correctly
*---------------------------------------

	/* Note: These Ns come from summary reports
			 on the IDOT website. */

count

assert r(N) == 2095876 if `year' == 2013
assert r(N) == 2043247 if `year' == 2014
assert r(N) == 2022332 if `year' == 2015
assert r(N) == 2169796 if `year' == 2016
assert r(N) == 2284916 if `year' == 2017 /* Note, PDF says  2,284,919; file is missing three stops */
assert r(N) == 2470318 if `year' == 2018 /* Note, PDF says  2,470,322; file is missing four stops */
assert r(N) == 2501409 if `year' == 2019 /* Note, PDF says  2,483,904; file has an extra 17K stops 
												  Extra stops are not in Chicago police or ISP data:
												  File has 595,515 CPD stops, vs. 598,332 in PDF;
												  File has 304,041 ISP stops, same as PDF. */
assert r(N) == 1567602 if `year' == 2020 /* Note, PDF says 1,561,514; file has an extra 6K stops */
assert r(N) == 1655351 if `year' == 2021 /* Note, PDF says 1,655,935; file is missing 584 stops */


* keep CPD, state troopers in Cook County, suburban police
*---------------------------------------------------------
	* Note: https://web.archive.org/web/20201027072407/https://isp.illinois.gov/Patrol/Districts
keep if agencyname == "CHICAGO POLICE" | ///
		(beatlocationofstop == "03" & agencyname == "ILLINOIS STATE POLICE") | ///
		agencyname == "ELGIN POLICE" | agencyname == "SCHAUMBURG POLICE" | ///
		agencyname == "EVANSTON POLICE" | agencyname == "CICERO POLICE" | ///
		agencyname == "ARLINGTON HEIGHTS POLICE" | agencyname == "PALATINE POLICE" | ///
		agencyname == "SKOKIE POLICE" | agencyname == "DES PLAINES POLICE" | ///
		agencyname == "ORLAND PARK POLICE" | agencyname == "OAK LAWN POLICE" | ///
		agencyname == "BERWYN POLICE" | agencyname == "MOUNT PROSPECT POLICE" | ///
		agencyname == "TINLEY PARK POLICE" | agencyname == "OAK PARK POLICE" | ///
		agencyname == "HOFFMAN ESTATES POLICE" | agencyname == "AURORA POLICE" | ///
		agencyname == "JOLIET POLICE" | agencyname == "NAPERVILLE POLICE" | ///
		agencyname == "WAUKEGAN POLICE" | agencyname == "BOLINGBROOK POLICE" | ///
		agencyname == "WHEATON POLICE" | agencyname == "DOWNERS GROVE POLICE"
	

* dates
*------
cap replace dateofstop = datestop if dateofstop == ""
if `year' == 2013 | `year' == 2019 {
gen double date = dofc(clock(dateofstop, "YMDhms")) if agencyname ~= "CHICAGO POLICE"
replace date = date(dateofstop, "MDY") if agencyname == "CHICAGO POLICE"
}
if `year' == 2020 | `year' == 2021 {
gen double date = dofc(clock(dateofstop, "YMDhms")) if agencyname ~= "CHICAGO POLICE"
replace date = date(dateofstop, "MD20Y") if agencyname == "CHICAGO POLICE"
}
if `year' == 2017 {
gen double date = date(dateofstop, "YMD") if agencyname ~= "CHICAGO POLICE"
replace date = date(dateofstop, "MDY") if agencyname == "CHICAGO POLICE"

}
if `year' ~= 2013 & `year' ~= 2017 & `year' < 2019 {
gen double date = date(dateofstop, "MDY") 
}
drop if date == . /* This applies to two observations in 2017 only */
assert `r(N_drop)' < 5
format date %td
assert date >= d(01jan`year') & date <= d(31dec`year')


* indicator variables for counts: race
*-------------------------------------
gen count = 1
gen count_racenonmiss = (driverrace ~= . & driverrace ~= 999)
gen black = (driverrace == 2) 
replace black = . if driverrace == . | driverrace == 999
gen white = (driverrace == 1)
replace white = . if driverrace == . | driverrace == 999
gen hispanic = (driverrace == 4)
replace hispanic = . if driverrace == . | driverrace == 999
gen asian = (driverrace == 5)
replace asian = . if driverrace == . | driverrace == 999
gen pacificislander = (driverrace == 6)
replace pacificislander = . if driverrace == . | driverrace == 999


* indicator variables for counts: reasons
*----------------------------------------
gen citation = (result == 1)
gen black_citation = (black == 1 & citation == 1)
replace black_citation =  . if black == . | citation == .
gen white_citation = (white == 1 & citation == 1)
replace white_citation = . if white == . | citation == .
gen reason_movingviolation = (reasonforstop == 1)
gen reason_equip = (reasonforstop == 2)
gen reason_license = (reasonforstop == 3)
gen reason_commercial = (reasonforstop == 4) /* Note no missings in this category */
gen reason_equip_black = (reasonforstop == 2 & black == 1)
replace reason_equip_black = . if black == .
gen reason_moving_black = (reasonforstop == 1 & black == 1)
replace reason_moving_black = . if black == .
gen reason_equip_white = (reasonforstop == 2 & white == 1)
replace reason_equip_white = . if white == .
gen reason_moving_white = (reasonforstop == 1 & white == 1)
replace reason_moving_white = . if white == .


* collapse to city-agency-beat-month
*-----------------------------------
gen month = mofd(date)
format month %tm 
replace beatlocationofstop = "" if agencyname ~= "CHICAGO POLICE" /* We don't use beats for the other agencies */
collapse (sum) count* black white hispanic asian pacific ///
			   citation black_citation white_citation ///
			   reason_*, ///
			   by(agencyname beatlocationofstop month)


* save
*-----
tempfile tsss`year'
save `tsss`year''


}


* append
*-------
clear 
forvalues year = 2012/2021 {
	di "YEAR: `year'"
	append using `tsss`year''
}


* beat IDs
*---------
replace beatlocationofstop = "0" + beatlocationofstop if ///
	agencyname == "CHICAGO POLICE" & length(beatlocationofstop) == 3


* save
*-----	
save "data/outputs/TSSS_beat.dta", replace


* end

